Load the required libraries.
library(tidyverse)
library(sf)
library(here)
library(readxl)
library(scales)
library(DT)
library(brms)
library(tidybayes)
library(patchwork)
library(marginaleffects)
library(ggrepel)
library(scico)
library(ggdensity)
library(ggpubr)
library(units)
library(glue)
library(ggh4x)
Functions that we will use throughout the script
#labeller for years
year_labels <- c(1950:1963)
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
Function for counterfactual plots
plot_counterfactual <- function(model_data, model, population_denominator, outcome, grouping_var=NULL, re_formula,...){
#labeller for years
year_labels <- c(1950:1964) #extra year for the extant of the x-axis
#The Glasgow mass minuture chest X-ray campaign happened between 11th March and 12th April 1957
#Segment for graphs to match ACF period
acf_start <- decimal_date(ymd("1957-03-11"))
acf_end <- decimal_date(ymd("1957-04-12"))
summary <- {{model_data}} %>%
select(year, year2, y_num, acf_period, {{population_denominator}}, {{outcome}}, {{grouping_var}}) %>%
add_epred_draws({{model}}, re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#create the counterfactual (no intervention), and summarise
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, {{outcome}}) %>%
mutate(acf_period = "a. pre-acf"), re_formula={{re_formula}}) %>%
group_by(year2, acf_period, {{grouping_var}}) %>%
mean_qi() %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000,
.epred_inc.lower = .epred.lower/{{population_denominator}}*100000,
.epred_inc.upper = .epred.upper/{{population_denominator}}*100000) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Before Intervention",
acf_period=="c. post-acf" ~ "Post Intervention"))
#plot the intervention effect
p <- summary %>%
droplevels() %>%
ggplot() +
geom_vline(aes(xintercept=acf_start, linetype="Mass CXR screening intervention")) +
geom_vline(aes(xintercept=acf_end, linetype="Mass CXR screening intervention")) +
geom_ribbon(aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, group = acf_period, fill=acf_period), alpha=0.5) +
geom_ribbon(data = counterfact %>% filter(year>=1956),
aes(ymin=.epred_inc.lower, ymax=.epred_inc.upper, x=year2, fill="Counterfactual"), alpha=0.5) +
geom_line(data = counterfact %>% filter(year>=1956),
aes(y=.epred_inc, x=year2, colour="Counterfactual")) +
geom_line(aes(y=.epred_inc, x=year2, group=acf_period, colour=acf_period)) +
geom_point(data = {{model_data}} %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Pre-ACF",
acf_period=="b. acf" ~ "ACF",
acf_period=="c. post-acf" ~ "Post-ACF")),
aes(y={{outcome}}, x=year2, shape=fct_relevel(acf_period,
"Pre-ACF",
"ACF",
"Post-ACF")), size=2) +
theme_grey() +
scale_y_continuous(labels=comma, limits =c(0,400)) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:", na.translate = F) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA") , name="Model estimates:", na.translate = F) +
scale_shape_discrete(name="Empirical data (period):", na.translate = F) +
scale_linetype_manual(values = 2, name="") +
labs(
x = "",
y = "CNR (per 100,000)"
) +
guides(x = "axis_truncated", y = "axis_truncated") +
theme(legend.position = "bottom",
legend.box="vertical",
text = element_text(size=10),
axis.text.x = element_text(size=10, angle = 90, hjust=1, vjust=0.5),
legend.text = element_text(size=10),
legend.spacing.y = unit(0.1, 'cm'),
axis.line = element_line(colour = "black"))
facet_vars <- vars(...)
if (length(facet_vars) != 0) {
p <- p + facet_wrap(facet_vars)
}
p
}
Function for calculating measures of change over time (RR.peak, RR.level, RR.slope)
summarise_change <- function(model_data, model, population_denominator, grouping_var = NULL, re_formula = NULL) {
#functions for calculating RR.peak
#i.e. relative case notification rate in 1957 vs. counterfactual trend for 1957
grouping_var <- enquo(grouping_var)
if (!is.null({{grouping_var}})) {
#make the prediction matrix, conditional on whether we want random effects included or not.
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
} else {
out <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf")
)
}
peak_draws <- add_epred_draws(newdata = out,
object = {{model}},
re_formula = {{re_formula}}) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
peak_summary <- peak_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#functions for calculating RR.level
#i.e. relative case notification rate in 1958 vs. counterfactual trend for 1958
if (!is.null({{grouping_var}})) {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out2 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
level_draws <- add_epred_draws(newdata = out2,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num, .draw) %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
mutate(measure = "RR.level")
level_summary <- level_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#functions for calculating RR.slope
#i.e. relative change in case notification rate in 1958-1963 vs. counterfactual trend for 1959-1963
if (!is.null({{grouping_var}})) {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num, !!grouping_var) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
} else {
out3 <- crossing({{model_data}} %>%
select({{population_denominator}}, y_num) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf")
)
}
slope_draws <- add_epred_draws(newdata = out3,
object = {{model}},
re_formula = {{re_formula}}) %>%
arrange(y_num) %>%
ungroup() %>%
mutate(epred_cnr = .epred/population_without_inst_ship*100000) %>%
group_by(.draw, y_num, !!grouping_var) %>%
summarise(slope = last(epred_cnr)/first(epred_cnr)) %>%
ungroup() %>%
group_by(.draw, !!grouping_var) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
slope_summary <- slope_draws %>%
group_by(!!grouping_var) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.slope")
#gather all the results into a named list
lst(peak_draws=peak_draws, peak_summary=peak_summary,
level_draws=level_draws, level_summary=level_summary,
slope_draws=slope_draws, slope_summary=slope_summary)
}
Function for calculating difference from counterfactual
calculate_counterfactual <- function(model_data, model, population_denominator, grouping_var=NULL, re_formula=NA){
#effect vs. counterfactual
counterfact <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc_counterf = .epred/{{population_denominator}}*100000, .epred_counterf=.epred) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, .draw, .epred_counterf, .epred_inc_counterf, {{grouping_var}})
#Calcuate case notification rate per draw, then summarise.
post_change <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, year, {{grouping_var}}, acf_period) %>%
mutate(.epred_inc = .epred/{{population_denominator}}*100000) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, {{population_denominator}}, {{grouping_var}}, .draw, .epred, .epred_inc, {{grouping_var}})
#for the overall period
counterfact_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}) %>%
mutate(acf_period = "a. pre-acf"),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, .draw, .epred, {{grouping_var}}) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred_counterf = sum(.epred))
#Calcuate case notification rate per draw, then summarise.
post_change_overall <-
add_epred_draws(object = {{model}},
newdata = {{model_data}} %>%
select(year, year2, y_num, {{population_denominator}}, {{grouping_var}}, acf_period),
re_formula = {{re_formula}}) %>%
group_by(.draw, {{grouping_var}}) %>%
filter(year>1957) %>%
ungroup() %>%
select({{population_denominator}}, {{grouping_var}}, .draw, .epred) %>%
group_by(.draw, {{grouping_var}}) %>%
summarise(.epred = sum(.epred))
counter_post <-
left_join(counterfact, post_change) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf,
diff_inc100k = .epred_inc - .epred_inc_counterf,
rr_inc100k = .epred_inc/.epred_inc_counterf) %>%
group_by(year, {{grouping_var}}) %>%
mean_qi(cases_averted, pct_change, diff_inc100k, rr_inc100k) %>%
ungroup()
counter_post_overall <-
left_join(counterfact_overall, post_change_overall) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by({{grouping_var}}) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup()
lst(counter_post, counter_post_overall)
}
Function for tidying up counterfactuals (mostly for making nice tables)
tidy_counterfactuals <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"),
diff_inc = glue::glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr_inc = glue::glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"))
}
tidy_counterfactuals_overall <- function(data){
data %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(year = as.character(year),
cases_averted = glue::glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change} ({pct_change.lower} to {pct_change.upper})"))
}
Import datasets for analysis
Make a map of Glasgow wards
glasgow_wards_1951 <- st_read(here("mapping/glasgow_wards_1951.geojson"))
Reading layer `glasgow_wards_1951' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/glasgow_wards_1951.geojson'
using driver `GeoJSON'
Simple feature collection with 37 features and 3 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -4.393502 ymin: 55.77464 xmax: -4.070411 ymax: 55.92814
Geodetic CRS: WGS 84
#read in Scotland boundary
scotland <- st_read(here("mapping/Scotland_boundary/Scotland boundary.shp"))
Reading layer `Scotland boundary' from data source
`/Users/petermacpherson/Dropbox/Projects/Historical TB ACF 2023-11-28/Work/analysis/glasgow-cxr/mapping/Scotland_boundary/Scotland boundary.shp'
using driver `ESRI Shapefile'
Simple feature collection with 1 feature and 1 field
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: 5513 ymin: 530249 xmax: 470332 ymax: 1220302
Projected CRS: OSGB36 / British National Grid
#make a bounding box for Glasgow
bbox <- st_bbox(glasgow_wards_1951) |> st_as_sfc()
#plot scotland with a bounding box around the City of Glasgow
scotland_with_bbox <- ggplot() +
geom_sf(data = scotland, fill="antiquewhite") +
geom_sf(data = bbox, colour = "#C60C30", fill="antiquewhite") +
theme_void() +
theme(panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "#EAF7FA", size = 0.3))
#plot the wards
#note we tidy up some names to fit on map
glasgow_ward_map <- glasgow_wards_1951 %>%
mutate(ward = case_when(ward=="Shettleston and Tollcross" ~ "Shettleston and\nTollcross",
ward=="Partick (West)" ~ "Partick\n(West)",
ward=="Partick (East)" ~ "Partick\n(East)",
ward=="North Kelvin" ~ "North\nKelvin",
ward=="Kinning Park" ~ "Kinning\nPark",
TRUE ~ ward)) %>%
ggplot() +
geom_sf(aes(fill=division)) +
geom_sf_label(aes(label = ward), size=3, fill=NA, label.size = NA, colour="black") +
#scale_colour_identity() +
scale_fill_brewer(palette = "Set3", name="City of Glasgow Division") +
theme_grey() +
labs(x="",
y="",
fill="Division") +
theme(legend.position = "top",
panel.border = element_rect(colour = "grey78", fill=NA, linewidth = 0.5),
panel.background = element_rect(fill = "antiquewhite", size = 0.3),
panel.grid.major = element_line(color = "grey78")) +
guides(fill=guide_legend(title.position = "top", title.hjust = 0.5, title.theme = element_text(face="bold")))
#add the map of scotland as an inset
glasgow_ward_map + inset_element(scotland_with_bbox, 0.75, 0, 1, 0.4)
ggsave(here("figures/s1.png"), height=10, width = 12)
NA
NA
Calculate areas per geographical unit
sf_use_s2(FALSE) #https://github.com/r-spatial/sf/issues/1762
glasgow_wards_1951 <- glasgow_wards_1951 %>%
mutate(area = st_area(glasgow_wards_1951))
glasgow_wards_1951$area_km <- units::set_units(glasgow_wards_1951$area, km^2)
Make division shape files, and calculate area (stopped working, need to fix!)
# glasgow_divisions_1951 <- glasgow_wards_1951 %>%
# group_by(division) %>%
# summarize(geometry = st_union(geometry)) %>%
# nngeo::st_remove_holes() %>%
# mutate(area = st_area(glasgow_divisions_1951))
#
# glasgow_divisions_1951$area_km <- units::set_units(glasgow_divisions_1951$area, km^2)
Load in the datasets for denonomiators, and check for consistency.
overall_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "overall_population")
overall_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
overall_pops <- overall_pops %>%
mutate(year2 = year+0.5)
Note, we have three population estimates:
(Population in shipping is estimated from the 1951 census, so is the same for most years)
First, plot the total population
overall_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2), alpha=0.5, colour = "mediumseagreen", fill="mediumseagreen") +
geom_point(aes(y=total_population, x=year2), colour = "mediumseagreen") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: total population",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
Now the population excluding institutionalised and shipping population
overall_pops %>%
ggplot() +
geom_area(aes(y=population_without_inst_ship, x=year2), alpha=0.5, colour = "purple", fill="purple") +
geom_point(aes(y=population_without_inst_ship, x=year2), colour = "purple") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(
title = "Glasgow Corporation: population excluding institutionalised and shipping",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist()
NA
NA
There are 5 Divisions containing 37 Wards in the Glasgow Corporation, with consistent boundaries over time.
#look-up table for divisions and wards
ward_lookup <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "divisions_wards")
ward_pops <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "ward_population")
ward_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
ward_pops <- ward_pops %>%
mutate(year2 = year+0.5)
#Get the Division population
division_pops <- ward_pops %>%
group_by(division, year) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship, na.rm = TRUE),
institutions = sum(institutions, na.rm = TRUE),
shipping = sum(shipping, na.rm = TRUE),
total_population = sum(total_population, na.rm = TRUE))
`summarise()` has grouped output by 'division'. You can override using the `.groups` argument.
division_pops %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Plot the overall population by Division and Ward
division_pops %>%
mutate(year2 = year+0.5) %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division), alpha=0.8) +
geom_point(aes(y=total_population, x=year2, colour=division)) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(division~.) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name = "") +
scale_colour_brewer(palette = "Set3", name = "") +
labs(
title = "Glasgow Corporation: total population by Division",
subtitle = "1950 to 1963",
x = "Year",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
ward_pops %>%
ggplot() +
geom_area(aes(y=total_population, x=year2, colour=division, fill=division)) +
geom_point(aes(y=total_population, x=year2, colour=division), colour="black") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
facet_wrap(ward~., ncol=6) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
scale_fill_brewer(palette = "Set3", name="Division") +
scale_colour_brewer(palette = "Set3", name = "Division") +
labs(
x = "",
y = "Population",
caption = "Mid-year estimates\nMass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
ggsave(here("figures/s2.png"), height=14, width=12)
Approximately, how many person-years of follow-up do we have?
overall_pops %>%
ungroup() %>%
summarise(across(year, length, .names = "years"),
across(c(population_without_inst_ship, total_population), sum)) %>%
mutate(across(where(is.double), comma)) %>%
datatable()
NA
NA
Change in population by ward
ward_pops %>%
group_by(ward) %>%
summarise(pct_change_pop = (last(population_without_inst_ship) - first(population_without_inst_ship))/first(population_without_inst_ship)) %>%
mutate(pct_change_pop = percent(pct_change_pop)) %>%
arrange(pct_change_pop) %>%
datatable()
NA
NA
NA
Output population density by ward and divison for regression modelling
Wards first
(stopped working, need to fix)
# ward_covariates <- glasgow_wards_1951 %>%
# left_join(ward_pops) %>%
# mutate(people_per_km_sq = as.double(population_without_inst_ship/area_km))
#
# #plot it out
#
# ward_covariates %>%
# ggplot() +
# geom_sf(aes(fill=people_per_km_sq)) +
# facet_wrap(year~., ncol=7) +
# scale_fill_viridis_c(option="A") +
# theme(legend.position = "bottom",
# axis.text.x = element_text(angle = 45, hjust=1))
#
# ggsave(here("figures/ward_pop_density.png"), width=10)
#
# write_rds(ward_covariates, here("populations/ward_covariates.rds"))
Now divisions first
(stopped working, need to fix)
# division_covariates <- glasgow_divisions_1951 %>%
# left_join(division_pops) %>%
# mutate(people_per_km_sq = as.double(total_population/area_km))
#
# #plot it out
#
# division_covariates %>%
# ggplot() +
# geom_sf(aes(fill=people_per_km_sq)) +
# geom_sf_label(aes(label = division), size=3, fill=NA, label.size = NA, colour="black", family = "Segoe UI") +
# facet_wrap(year~., ncol=7) +
# scale_fill_viridis_c(option="G") +
# theme(legend.position = "bottom",
# axis.text.x = element_text(angle = 45, hjust=1))
#
# ggsave(here("figures/division_pop_density.png"), width=10)
#
# write_rds(division_covariates, here("populations/division_covariates.rds"))
age_sex <- read_xlsx(path = "2023-11-28_glasgow-acf.xlsx", sheet = "age_sex_population") %>%
pivot_longer(cols = c(male, female),
names_to = "sex")
#collapse down to smaller age groups to be manageable
age_sex <- age_sex %>%
ungroup() %>%
mutate(age = case_when(age == "0 to 4" ~ "00 to 04",
age == "5 to 9" ~ "05 to 14",
age == "10 to 14" ~ "05 to 14",
age == "15 to 19" ~ "15 to 24",
age == "20 to 24" ~ "15 to 24",
age == "25 to 29" ~ "25 to 34",
age == "30 to 34" ~ "25 to 34",
age == "35 to 39" ~ "35 to 44",
age == "40 to 44" ~ "35 to 44",
age == "45 to 49" ~ "45 to 59",
age == "50 to 54" ~ "45 to 59",
age == "55 to 59" ~ "45 to 59",
TRUE ~ "60 & up")) %>%
group_by(year, age, sex) %>%
mutate(value = sum(value)) %>%
ungroup()
m_age_sex <- lm(value ~ splines::ns(year, knots = 3)*age*sex, data = age_sex)
summary(m_age_sex)
Warning: essentially perfect fit: summary may be unreliable
Call:
lm(formula = value ~ splines::ns(year, knots = 3) * age * sex,
data = age_sex)
Residuals:
Min 1Q Median 3Q Max
-1.185e-10 0.000e+00 0.000e+00 0.000e+00 1.185e-10
Coefficients: (14 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 5.222e+04 2.040e-10 2.559e+14 <2e-16 ***
splines::ns(year, knots = 3)1 -8.043e+03 4.071e-10 -1.976e+13 <2e-16 ***
splines::ns(year, knots = 3)2 NA NA NA NA
age05 to 14 3.669e+04 2.499e-10 1.468e+14 <2e-16 ***
age15 to 24 -3.893e+03 2.499e-10 -1.558e+13 <2e-16 ***
age25 to 34 -3.996e+04 2.499e-10 -1.599e+14 <2e-16 ***
age35 to 44 -4.230e+04 2.499e-10 -1.693e+14 <2e-16 ***
age45 to 59 5.459e+04 2.356e-10 2.317e+14 <2e-16 ***
age60 & up 7.533e+04 2.204e-10 3.418e+14 <2e-16 ***
sexmale 3.374e+03 2.886e-10 1.169e+13 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14 -1.863e+03 4.985e-10 -3.737e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14 NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24 7.533e+04 4.985e-10 1.511e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24 NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34 1.325e+05 4.985e-10 2.658e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34 NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44 1.380e+05 4.985e-10 2.769e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44 NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59 3.474e+03 4.700e-10 7.390e+12 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59 NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up -8.453e+04 4.397e-10 -1.923e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up NA NA NA NA
splines::ns(year, knots = 3)1:sexmale -1.994e+03 5.757e-10 -3.464e+12 <2e-16 ***
splines::ns(year, knots = 3)2:sexmale NA NA NA NA
age05 to 14:sexmale 1.053e+04 3.534e-10 2.980e+13 <2e-16 ***
age15 to 24:sexmale 2.352e+04 3.534e-10 6.656e+13 <2e-16 ***
age25 to 34:sexmale 1.355e+04 3.534e-10 3.833e+13 <2e-16 ***
age35 to 44:sexmale -1.727e+03 3.534e-10 -4.888e+12 <2e-16 ***
age45 to 59:sexmale 2.774e+03 3.332e-10 8.324e+12 <2e-16 ***
age60 & up:sexmale -7.761e+04 3.117e-10 -2.490e+14 <2e-16 ***
splines::ns(year, knots = 3)1:age05 to 14:sexmale -2.049e+04 7.051e-10 -2.906e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age05 to 14:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age15 to 24:sexmale -6.780e+04 7.051e-10 -9.616e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age15 to 24:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age25 to 34:sexmale -3.804e+04 7.051e-10 -5.396e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age25 to 34:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age35 to 44:sexmale -1.171e+04 7.051e-10 -1.661e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age35 to 44:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age45 to 59:sexmale -3.473e+04 6.647e-10 -5.224e+13 <2e-16 ***
splines::ns(year, knots = 3)2:age45 to 59:sexmale NA NA NA NA
splines::ns(year, knots = 3)1:age60 & up:sexmale 1.056e+05 6.218e-10 1.698e+14 <2e-16 ***
splines::ns(year, knots = 3)2:age60 & up:sexmale NA NA NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 3.074e-11 on 44 degrees of freedom
Multiple R-squared: 1, Adjusted R-squared: 1
F-statistic: 6.006e+29 on 27 and 44 DF, p-value: < 2.2e-16
age_levels <- age_sex %>% select(age) %>% distinct() %>% pull()
age_sex_nd <-
crossing(
age=age_levels,
sex=c("male", "female"),
year = 1950:1963
)
pred_pops <- age_sex_nd %>% modelr::add_predictions(m_age_sex)
Warning: prediction from a rank-deficient fit may be misleading
pred_pops %>%
ggplot(aes(x=year, y=pred, colour=age)) +
geom_line() +
geom_point() +
facet_grid(sex~.) +
scale_y_continuous(labels = comma, limits = c(0, 125000))
#How well do they match up with our overall populations?
pred_pops %>%
group_by(year) %>%
summarise(sum_pred_pop = sum(pred)) %>%
right_join(overall_pops) %>%
select(year, sum_pred_pop, population_without_inst_ship, total_population) %>%
pivot_longer(cols = c(sum_pred_pop, population_without_inst_ship, total_population)) %>%
ggplot(aes(x=year, y=value, colour=name)) +
geom_point() +
scale_y_continuous(labels = comma, limits = c(800000, 1250000))
Joining with `by = join_by(year)`
pred_pops %>%
group_by(year, sex) %>%
summarise(sum = sum(pred)) %>%
group_by(year) %>%
mutate(sex_ratio = first(sum)/last(sum))
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
Population pyramids
label_abs <- function(x) {
comma(abs(x))
}
pred_pops %>%
ungroup() %>%
group_by(year) %>%
mutate(year_pop = sum(pred),
age_sex_pct = percent(pred/year_pop, accuracy=0.1)) %>%
mutate(sex = case_when(sex=="male" ~ "Male",
sex=="female" ~ "Female")) %>%
ggplot(
aes(x = age, fill = sex,
y = ifelse(test = sex == "Female",yes = -pred, no = pred))) +
geom_bar(stat = "identity") +
geom_text(aes(label = age_sex_pct),
position= position_stack(vjust=0.5), colour="black", size=2.5) +
facet_wrap(year~., ncol=7) +
coord_flip() +
scale_y_continuous(labels = label_abs) +
scale_fill_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
theme_ggdist() +
theme(axis.text.x = element_text(angle=90, hjust = 1, vjust=0.5),
legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s3.png"), width=10)
Saving 10 x 4.5 in image
Not perfect, but resonably good. But ahhhhh… the age groups don’t align with the case notification age groups! Come back to think about this later.
Import the tuberculosis cases dataset
Overall, by year.
cases_by_year <- read_xlsx("2023-11-28_glasgow-acf.xlsx", sheet = "by_year")
cases_by_year%>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
#shift year to midpoint
cases_by_year <- cases_by_year %>%
mutate(year2 = year+0.5)
Plot the overall number of case notified per year, by pulmonary and extra pulmonary classification.
cases_by_year %>%
select(-total_notifications, -year) %>%
pivot_longer(cols = c(pulmonary_notifications, `non-pulmonary_notifications`)) %>%
mutate(name = case_when(name == "pulmonary_notifications" ~ "Pulmonary TB",
name == "non-pulmonary_notifications" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
Read in the datasets and merge together.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
ward_sheets <- enframe(all_sheets) %>%
filter(grepl("by_ward", value)) %>%
pull(value)
cases_by_ward_sex_year <- map_df(ward_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_ward_sex_year %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
Aggregate together to get cases by division
cases_by_division <- cases_by_ward_sex_year %>%
left_join(ward_lookup) %>%
group_by(division, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE))
Joining with `by = join_by(ward)``summarise()` has grouped output by 'division', 'year'. You can override using the `.groups` argument.
#shift year to midpoint
cases_by_division <- cases_by_division %>%
mutate(year2 = year+0.5) %>%
ungroup()
cases_by_division %>%
select(-year2) %>%
select(year, everything()) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
cases_by_division %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
cases_by_ward <- cases_by_ward_sex_year %>%
group_by(ward, year, tb_type) %>%
summarise(cases = sum(cases, na.rm = TRUE)) %>%
ungroup()
`summarise()` has grouped output by 'ward', 'year'. You can override using the `.groups` argument.
cases_by_ward %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
select(year, everything()) %>%
datatable()
#shift year to midpoint
cases_by_ward <- cases_by_ward %>%
mutate(year2 = year+0.5)
cases_by_ward %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=cases, x=year2, group = tb_type, fill=tb_type), alpha=0.8) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~., scales = "free_y") +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis notifications by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Number of cases",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
As we don’t have denominators, we will just model the change in counts.
#list all the sheets
all_sheets <- excel_sheets("2023-11-28_glasgow-acf.xlsx")
#get the ward sheets
age_sex_sheets <- enframe(all_sheets) %>%
filter(grepl("by_age_sex", value)) %>%
pull(value)
cases_by_age_sex <- map_df(age_sex_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
cases_by_age_sex %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
What percentage of adults (15+ participated in the intervention in 1957)?
#
# pred_pops %>%
# ungroup() %>%
# filter(year==1957) %>%
# filter(age != "00 to 04",
# age != "05 to 14") %>%
# summarise(total_pop = sum(pred)) %>%
# mutate(cxr_screened = 622349) %>%
# mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
#
# pred_pops %>%
# ungroup() %>%
# filter(year==1957) %>%
# filter(age != "00 to 04",
# age != "05 to 14") %>%
# summarise(total_pop = sum(pred), .by=sex) %>%
# mutate(cxr_screened = c(340474, 281875)) %>%
# mutate(pct_pop_cxr_screened = percent(cxr_screened/total_pop))
Note that in the Report of Sir Kenneth Cowan, we have the following estimates of participation (we will use these for the manuscript, as they are not based on my estimates)
male_adult_resident_participation <- 281875
female_adult_resident_participation <- 340474
male_adult_resident_population <- 381713
female_adult_resident_population <- 437588
#overall participation
(male_adult_resident_participation+female_adult_resident_participation)/(male_adult_resident_population+female_adult_resident_population)
[1] 0.7596097
#male participation
male_adult_resident_participation/male_adult_resident_population
[1] 0.7384475
#female participation
female_adult_resident_participation/female_adult_resident_population
[1] 0.7780698
Look at uptake of screening by age and sex
uptake_age_sex <- read_xlsx("2024-03-26_mass_xray_uptake.xlsx", sheet = "uptake_age_sex")
uptake_age_sex %>%
mutate(uptake = examined/resident_population) %>%
mutate(examined_l = comma(examined),
resident_population_l = comma(resident_population),
uptake_l = percent(uptake, accuracy=0.1)) %>%
mutate(label = glue("{examined_l}/{resident_population_l} ({uptake_l})")) %>%
filter(age !="00-14") %>%
mutate(sex = case_when(sex=="m" ~ "Male",
sex=="f" ~ "Female")) %>%
ggplot(aes(x=age, y=uptake, group=sex, fill=sex)) +
geom_bar(stat = "identity", position = "dodge") +
geom_text(aes(label=uptake_l), position = position_dodge(width=0.85),vjust=2) +
scale_y_continuous(labels=percent) +
scale_fill_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA)) +
labs(x="", y="")
ggsave(here("figures/s4.png"))
Saving 7.29 x 4.5 in image
Uptake by division
uptake_division <- read_xlsx("2024-03-26_mass_xray_uptake.xlsx", sheet = "uptake_division")
division_pops %>%
filter(year==1957) %>%
select(division, population_without_inst_ship) %>%
left_join(uptake_division) %>%
mutate(pct_pop_examined = examined/population_without_inst_ship)
Joining with `by = join_by(division)`
Now calculate case notification rates per 100,000 population
Merge the notification and population denominator datasets together.
Here we need to include the whole population (with shipping and institutions) as they are included in the notifications.
overall_inc <- overall_pops %>%
left_join(cases_by_year)
Joining with `by = join_by(year, year2)`
overall_inc <- overall_inc %>%
mutate(inc_pulm_100k = pulmonary_notifications/total_population*100000,
inc_ep_100k = `non-pulmonary_notifications`/total_population*100000,
inc_100k = total_notifications/total_population*100000)
overall_inc %>%
select(year, inc_100k, inc_pulm_100k, inc_ep_100k) %>%
mutate_at(.vars = vars(inc_100k, inc_pulm_100k, inc_ep_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
overall_inc %>%
select(year2, inc_pulm_100k, inc_ep_100k) %>%
pivot_longer(cols = c(inc_pulm_100k, `inc_ep_100k`)) %>%
mutate(name = case_when(name == "inc_pulm_100k" ~ "Pulmonary TB",
name == "inc_ep_100k" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=value, x=year2, group = name, fill=name), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Change in case notification rates pre-intervention
#pre-ACF
overall_inc %>%
filter(year %in% 1950:1956) %>%
summarise(change = (((last(inc_pulm_100k)-first(inc_pulm_100k))/first(inc_pulm_100k))/7)*100)
#post-ACF
overall_inc %>%
filter(year %in% 1958:1963) %>%
summarise(change = (((last(inc_pulm_100k)-first(inc_pulm_100k))/first(inc_pulm_100k))/6)*100)
NA
division_inc <- division_pops %>%
left_join(cases_by_division)
Joining with `by = join_by(division, year)`
division_inc <- division_inc %>%
mutate(inc_100k = cases/total_population*100000)
division_inc %>%
select(year, division, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
division_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(division~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Division",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Case notification rate (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme_ggdist() +
theme(legend.position = "bottom")
NA
NA
NA
Here we will filter out the institutions and harbour from the denominators, as we don’t have reliable population denominators for them.
ward_inc <- ward_pops %>%
left_join(cases_by_ward)
Joining with `by = join_by(ward, year, year2)`
ward_inc <- ward_inc %>%
mutate(inc_100k = cases/population_without_inst_ship*100000)
ward_inc %>%
select(year, ward, tb_type, inc_100k) %>%
mutate_at(.vars = vars(inc_100k),
.funs = funs(round)) %>%
datatable()
Warning: `funs()` was deprecated in dplyr 0.8.0.
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
ward_inc %>%
mutate(tb_type = case_when(tb_type == "Pulmonary" ~ "Pulmonary TB",
tb_type == "Non-Pulmonary" ~ "Extra-pulmonary TB")) %>%
ggplot() +
geom_area(aes(y=inc_100k, x=year2, group = tb_type, fill=tb_type), alpha=0.5) +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels,
guide = guide_axis(angle = 90)) +
facet_wrap(ward~.) +
scale_fill_brewer(palette = "Set1", name="") +
labs(
title = "Glasgow Corporation: Tuberculosis case notification rate, by Ward",
subtitle = "1950 to 1963, by TB disease classification",
x = "Year",
y = "Incidence (per 100,000)",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: extra-pulmonary TB cases by Division/Ward not reported in 1962-1963"
) +
theme(legend.position = "bottom")
NA
NA
NA
NA
On a map
st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k)) +
facet_wrap(year~., ncol = 7) +
scale_fill_viridis_c(name="Case notification rate (per 100,000)",
option = "A") +
theme_ggdist() +
theme(legend.position = "top",
legend.key.width = unit(2, "cm"),
panel.border = element_rect(colour = "grey78", fill=NA)) +
guides(fill=guide_colorbar(title.position = "top"))
Joining with `by = join_by(division, ward, ward_number)`
Import the TB mortality data.
First, overall deaths. Note that in the original reports, we have a pulmonary TB death rate per million for all years, and numbers of pulmonary TB deaths for each year apart from 1950.
#get the overall mortality sheets
deaths_sheets <- enframe(all_sheets) %>%
filter(grepl("deaths", value)) %>%
pull(value)
overall_deaths <- map_df(deaths_sheets, ~read_xlsx(path = "2023-11-28_glasgow-acf.xlsx",
sheet = .))
overall_deaths %>%
mutate(across(where(is.numeric) & !(year), ~comma(.))) %>%
datatable()
NA
NA
NA
Plot the raw numbers of pulmonary deaths
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_deaths)) +
geom_line(colour = "#DE0D92") +
geom_point(colour = "#DE0D92") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
labs(y="Pulmonary TB deaths per year",
x = "Year",
title = "Numbers of pulmonary TB deaths",
subtitle = "Glasgow, 1950-1963",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)\nNote: no data for 1950") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
NA
NA
Now the incidence of pulmonary TB death
overall_deaths %>%
ggplot(aes(x=year, y=pulmonary_death_rate_per_100k)) +
geom_line(colour = "#4D6CFA") +
geom_point(colour = "#4D6CFA") +
geom_vline(aes(xintercept=acf_start), linetype=3) +
geom_vline(aes(xintercept=acf_end), linetype=3) +
scale_y_continuous(labels=comma) +
scale_x_continuous(labels = year_labels,
breaks = year_labels) +
labs(y="Annual incidence of death (per 100,000)",
x = "Year",
caption = "Mass miniature X-ray campaign period between dashed lines (11th March-12th April 1957)") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA))
ggsave(here("figures/s8.png"), width=10)
Saving 10 x 4.5 in image
Make Table 1 here, and save for publication.
overall_pops %>%
select(year, total_population) %>%
left_join(overall_inc %>%
select(year,
pulmonary_notifications, inc_pulm_100k,
`non-pulmonary_notifications`, inc_ep_100k,
total_notifications, inc_100k)) %>%
left_join(overall_deaths %>%
select(year,
pulmonary_deaths, pulmonary_death_rate_per_100k)) %>%
mutate(percent_pulmonary = percent(pulmonary_notifications/(total_notifications ), accuracy=0.1)) %>%
mutate(across(where(is.numeric) & !(year), ~round(., digits=1))) %>%
mutate(across(where(is.numeric) & !(year), ~comma(.)))
Joining with `by = join_by(year)`Joining with `by = join_by(year)`
Comparison fo age-sex distribution of cases in 1950-1956 vs. 1957
label_abs2 <- function(x) {
percent(abs(x))
}
cases_by_age_sex %>%
ungroup() %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(acf_period, age, sex) %>%
summarise(cases = sum(cases)) %>%
ungroup() %>%
group_by(acf_period) %>%
mutate(period_total = sum(cases)) %>%
mutate(pct = cases/period_total) %>%
mutate(pct2 = case_when(sex=="F" ~ -pct,
TRUE ~ pct)) %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
mutate(acf_period = case_when(acf_period=="a. pre-acf" ~ "Pre-ACF",
acf_period=="b. acf" ~ "ACF",
acf_period=="c. post-acf" ~ "Post-ACF")) %>%
ggplot() +
geom_vline(aes(xintercept=0), linetype=2) +
geom_point(aes(x=pct2,y=age, colour=fct_relevel(acf_period,
"Pre-ACF",
"ACF",
"Post-ACF")), stat="identity") +
scale_x_continuous(labels=label_abs2, limits = c(-0.2, 0.2)) +
scale_colour_manual(values = c("#DE0D92", "grey50", "#4D6CFA")) +
theme_grey(base_family = "Aptos") +
labs(x= "<- Female Percent of cases Male ->",
y="") +
theme(legend.title = element_blank(),
legend.position = "bottom")
`summarise()` has grouped output by 'acf_period', 'age'. You can override using the `.groups` argument.
ggsave(here("figures/s5.png"))
Saving 7.29 x 4.5 in image
Prepare the datasets for modelling
mdata <- ward_inc %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup()
mdata_extrapulmonary <- ward_inc %>%
filter(tb_type=="Non-Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
group_by(ward) %>%
mutate(y_num = row_number()) %>%
ungroup() %>%
filter(year<=1961) #no data for 1962 and 1963
#scaffold for overall predictions
overall_scaffold <- mdata %>%
select(year, year2, y_num, acf_period, population_without_inst_ship, ward, cases) %>%
group_by(year, year2, y_num, acf_period) %>%
summarise(population_without_inst_ship = sum(population_without_inst_ship),
cases = sum(cases)) %>%
ungroup() %>%
mutate(inc_100k = cases/population_without_inst_ship*100000) %>%
left_join(mdata_extrapulmonary %>% group_by(year) %>%
summarise(cases_extrapulmonary = sum(cases))) %>%
mutate(inc_100k_extrapulmonary = cases_extrapulmonary/population_without_inst_ship*100000)
`summarise()` has grouped output by 'year', 'year2', 'y_num'. You can override using the `.groups` argument.Joining with `by = join_by(year)`
This models the case notification rate over time, with a step change for the intervention, and slope change after the intervention.
Work on the priors a bit. We will build up from less complex to more complex.
at the intercept, we expect somewhere around 2500. We will set the standard deviation to both 0.5 and 1 to check what it looks like
#
# c(prior(lognormal(7.600902, 0.5)), #log(2500) = 7.600902
# prior(lognormal(7.600902, 1))) %>%
# parse_dist() %>%
#
# ggplot(aes(y = prior, dist = .dist, args = .args)) +
# stat_halfeye(.width = c(.5, .95)) +
# scale_y_discrete(NULL, labels = str_c("lognormal(log(2000), ", c(0.5, 1), ")"),
# expand = expansion(add = 0.1)) +
# xlab(expression(exp(italic(p)(beta[0])))) +
# coord_cartesian(xlim = c(0,15000))
#
#
# prior(gamma(1, 0.01)) %>%
# parse_dist() %>%
# ggplot(aes(y=prior, dist = .dist, args = .args)) +
# stat_halfeye(.width = c(0.5, 0.95))
#
# #now fit to a model, and plot some prior realisations
#
# m_prior1 <- brm(
# cases ~ 0 + Intercept,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape)
# )
#
# add_epred_draws(object=m_prior1,
# newdata = tibble(intercept=1)) %>%
# ggplot(aes(x=intercept, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(labels = comma)
Now try to add in a term for the effect of y_num. We anticpate that the number of cases will decline by about 1-5% per year. However, as we are pretty uncertain about this, we will just encode a weakly regularising prior to restrict the year size to sensible ranges.
#
#
# m_prior2 <- brm(
# cases ~ 0 + Intercept + y_num,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b, coef = y_num)
# )
#
# add_epred_draws(object=m_prior2,
# newdata = overall_scaffold) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
Now we want to add in a prior for the effect of the acf_intervention. We anticipate the peak to be anywhere between no effect, and a tripling
#
# m_prior3 <- brm(
# cases ~ 0 + Intercept + y_num + acf_period,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2000), 0.5), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b, coef = y_num) +
# prior(normal(0, 0.001), class = b)
# )
#
#
# add_epred_draws(object=m_prior3,
# newdata = overall_scaffold) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(labels = comma)
#
Now we look and see what it looks like with the interactions
#
# m_prior4 <- brm(
# cases ~ 0 + Intercept + y_num + acf_period + y_num:acf_period,
# family = negbinomial(),
# data = overall_scaffold,
# sample_prior = "only",
# prior = prior(normal(log(2500), 1), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b)
# )
#
# add_epred_draws(object=m_prior4,
# newdata = overall_scaffold) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
#
#
Now try adding in the random intercepts
# c(prior(lognormal(3.912023, 0.5)), #log(50) = 3.912023
# prior(lognormal(3.912023, 1))) %>%
# parse_dist() %>%
#
# ggplot(aes(y = prior, dist = .dist, args = .args)) +
# stat_halfeye(.width = c(.5, .95)) +
# scale_y_discrete(NULL, labels = str_c("lognormal(log(50), ", c(0.5, 1), ")"),
# expand = expansion(add = 0.1)) +
# xlab(expression(exp(italic(p)(beta[0])))) +
# coord_cartesian(xlim = c(0,400))
#
#
# m_prior5 <- brm(
# cases ~ y_num + acf_period + y_num:acf_period + ( 1 | ward),
# family = negbinomial(),
# data = mdata,
# sample_prior = "only",
# prior = prior(normal(log(50), 1), class = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b) +
# prior(exponential(1), class=sd)
# )
#
#
# add_epred_draws(object=m_prior5,
# newdata = mdata,
# re_formula = NA) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
#
# add_epred_draws(object=m_prior5,
# newdata = mdata,
# re_formula = NA) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma) +
# facet_wrap(ward~.)
And add in the random slopes
#
# m_prior6 <- brm(
# cases ~ 1 + y_num + acf_period + y_num:acf_period + (1 + y_num*acf_period | ward),
# family = negbinomial(),
# data = mdata,
# sample_prior = "only",
# prior = prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.1), class = b) +
# prior(exponential(1), class=sd) +
# prior(lkj(2), class=cor)
# )
#
#
#
# m_prior6 <- brm(
# cases ~ 0 + Intercept + y_num + acf_period + y_num:acf_period + ( y_num*acf_period | ward),
# family = negbinomial(),
# data = mdata,
# sample_prior = "only",
# prior = prior(normal(log(50), 1), class = b, coef = Intercept) +
# prior(gamma(1, 0.01), class = shape) +
# prior(normal(0, 0.01), class = b) +
# prior(exponential(100), class=sd) +
# prior(lkj(2), class=cor)
# )
# add_epred_draws(object=m_prior6,
# newdata = mdata,
# re_formula = NA) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma)
#
# add_epred_draws(object=m_prior6,
# newdata = mdata,
# re_formula = ~( 1 + y_num + acf_period | ward)) %>%
# ggplot(aes(x=year, y=.epred)) +
# stat_halfeye() +
# scale_y_log10(label=comma) +
# facet_wrap(ward~.)
#
# plot_counterfactual(model_data = overall_scaffold, model=m_prior6, outcome = inc_100k,
# population_denominator = population_without_inst_ship, re_formula = NA)
#
# plot_counterfactual(model_data = mdata, model=m_prior6, outcome = inc_100k,
# population_denominator = population_without_inst_ship, grouping_var = ward, ward,
# re_formula = ~( 1 + y_num + acf_period | ward))
Issue here is the non-centred parameterisation of the intercept prior… Feel like this is a more interpretable way to set priors… but will revert to centred parameterisation for the meantime.
# m_centered_prior <- brm(
# cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
# data = mdata,
# family = negbinomial(),
# seed = 1234,
# chains = 4, cores = 4,
# prior = prior(normal(0,1000), class = Intercept) +
# prior(gamma(0.01, 0.01), class = shape) +
# prior(normal(0, 1), class = b) +
# prior(exponential(1), class=sd) +
# prior(lkj(2), class=cor),
# sample_prior = "only")
#
# plot(m_centered_prior)
#
# plot_counterfactual(model_data = overall_scaffold, model=m_centered_prior, outcome = inc_100k,
# population_denominator = population_without_inst_ship, re_formula = NA)
#
# plot_counterfactual(model_data = mdata, model=m_centered_prior, outcome = inc_100k,
# population_denominator = population_without_inst_ship, grouping_var = ward, ward,
# re_formula = ~( 1 + y_num*acf_period | ward))
Look at the mean and variance of counts (counts of pulmonary notifications are what we are predicting)
#Mean of counts per year
mean(mdata$cases)
[1] 48.32819
#variance of counts per year
var(mdata$cases)
[1] 915.5749
Quite a bit of over-dispersion here, so negative binomial distribution might be a better choice of distributional family than Poisson.
Fit the model with the data
m_pulmonary <- brm(
cases ~ 0 + Intercept + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1), class=b, coef = "Intercept") +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(4), class=cor),
control = list(adapt_delta = 0.9))
Compiling Stan program...
Start sampling
starting worker pid=5049 on localhost:11567 at 16:50:13.533
starting worker pid=5070 on localhost:11567 at 16:50:13.746
starting worker pid=5084 on localhost:11567 at 16:50:13.932
starting worker pid=5098 on localhost:11567 at 16:50:14.111
Error in serialize(data, node$con, xdr = FALSE) :
error writing to connection
Nicer version of trace plots for supplemental material
as_draws_df(m_pulmonary) %>%
bayesplot::mcmc_rank_overlay(pars = vars(b_Intercept:shape),
facet_args = list(ncol = 4)) +
scale_colour_scico_d(palette = "managua", name = "Chain") +
theme_ggdist()+
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "top")
Scale for colour is already present.
Adding another scale for colour, which will replace the existing scale.
Nicer version of table of parameters for supplement
summarise_draws(m_pulmonary) %>%
mutate(across(c(mean:ess_tail), comma, accuracy=0.01)) %>%
write_csv(here("figures/s1_table.csv"))
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(mean:ess_tail), comma, accuracy = 0.01)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.
# Previously
across(a:b, mean, na.rm = TRUE)
# Now
across(a:b, \(x) mean(x, na.rm = TRUE))
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
Summarise the posterior in graphical form
f1b <- plot_counterfactual(model_data = overall_scaffold, model = m_pulmonary,
population_denominator = population_without_inst_ship, outcome = inc_100k, grouping_var=NULL,
re_formula = NA)
f1b
Make this into a figure combined with the map of empirical data
f1a <- st_as_sf(left_join(ward_inc, glasgow_wards_1951)) %>%
filter(tb_type=="Pulmonary") %>%
ggplot() +
geom_sf(aes(fill=inc_100k), colour="grey98", lwd=0.01) +
facet_wrap(year~., ncol = 7) +
scale_fill_scico(name="CNR (per 100,000)",
palette = "acton", direction = -1) +
theme_grey() +
theme(legend.position = "top",
#legend.key.width = unit(1, "cm"),
legend.title.align = 0.5,
axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
legend.title = element_text(size=10))
Joining with `by = join_by(division, ward, ward_number)`Warning: The `legend.title.align` argument of `theme()` is deprecated as of ggplot2 3.5.0.
Please use theme(legend.title = element_text(hjust)) instead.
(f1a / f1b) + plot_annotation(tag_levels = "A")
ggsave(here("figures/f1.png"), width=7, height=8)
Summary of change in notifications numerically
overall_change <- summarise_change(model_data=overall_scaffold, model=m_pulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change %>%
keep((names(.) %in% tokeep)) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
Numbers of pulmonary TB cases averted compared to counterfactual per year.
overall_pulmonary_counterf <- calculate_counterfactual(model_data = overall_scaffold, model=m_pulmonary, population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`Joining with `by = join_by(.draw)`
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Total pulmonary TB cases averted between 1958 and 1963
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
What are the correlations between peak, level, and slope?
Plot the counterfactual at ward level
Summary of change in notifications at ward level
Calculate the counterfactual per ward
ward_pulmonary_counterf <- calculate_counterfactual(model_data = mdata, model=m_pulmonary,
population_denominator = population_without_inst_ship,
grouping_var = ward, re_formula=~(1 + y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.Joining with `by = join_by(year, population_without_inst_ship, .draw, ward)`Joining with `by = join_by(.draw, ward)`
ward_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Overall counterfactual per ward
ward_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Now we will model the extra-pulmonary TB notification rate. Struggling a bit with negative binomial model, so revert to Poisson.
m_extrapulmonary <- brm(
cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)),
data = mdata_extrapulmonary,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1000), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b) +
prior(exponential(1), class=sd) +
prior(lkj(2), class=cor))
Compiling Stan program...
Start sampling
starting worker pid=98124 on localhost:11567 at 11:02:29.576
starting worker pid=98138 on localhost:11567 at 11:02:29.840
starting worker pid=98152 on localhost:11567 at 11:02:30.070
starting worker pid=98167 on localhost:11567 at 11:02:30.303
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 0.000729 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 7.29 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 0.000445 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 4.45 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 0.000525 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 5.25 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 0.000231 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 2.31 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 20.64 seconds (Warm-up)
Chain 1: 8.876 seconds (Sampling)
Chain 1: 29.516 seconds (Total)
Chain 1:
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 20.57 seconds (Warm-up)
Chain 2: 8.787 seconds (Sampling)
Chain 2: 29.357 seconds (Total)
Chain 2:
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 20.32 seconds (Warm-up)
Chain 4: 8.993 seconds (Sampling)
Chain 4: 29.313 seconds (Total)
Chain 4:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 21.638 seconds (Warm-up)
Chain 3: 8.729 seconds (Sampling)
Chain 3: 30.367 seconds (Total)
Chain 3:
Warning: Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
https://mc-stan.org/misc/warnings.html#bulk-ess
summary(m_extrapulmonary)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ 1 + y_num * acf_period + (1 + y_num * acf_period | ward) + offset(log(population_without_inst_ship))
Data: mdata_extrapulmonary (Number of observations: 444)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Multilevel Hyperparameters:
~ward (Number of levels: 37)
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept) 0.33 0.06 0.23 0.46 1.00 1365 2322
sd(y_num) 0.02 0.01 0.00 0.04 1.01 384 1066
sd(acf_periodb.acf) 0.11 0.09 0.00 0.33 1.00 2084 1637
sd(acf_periodc.postMacf) 0.12 0.09 0.01 0.34 1.01 1322 1811
sd(y_num:acf_periodb.acf) 0.01 0.01 0.00 0.04 1.00 1909 1615
sd(y_num:acf_periodc.postMacf) 0.01 0.01 0.00 0.04 1.00 906 1267
cor(Intercept,y_num) -0.12 0.31 -0.66 0.53 1.00 1969 2669
cor(Intercept,acf_periodb.acf) -0.01 0.33 -0.64 0.62 1.00 5360 3024
cor(y_num,acf_periodb.acf) -0.00 0.33 -0.65 0.62 1.00 4082 2934
cor(Intercept,acf_periodc.postMacf) -0.07 0.32 -0.65 0.57 1.00 4841 2964
cor(y_num,acf_periodc.postMacf) -0.04 0.33 -0.64 0.59 1.00 3930 3058
cor(acf_periodb.acf,acf_periodc.postMacf) 0.03 0.33 -0.62 0.66 1.00 2814 2651
cor(Intercept,y_num:acf_periodb.acf) -0.01 0.33 -0.63 0.63 1.00 5389 3013
cor(y_num,y_num:acf_periodb.acf) -0.02 0.33 -0.62 0.60 1.00 3765 3248
cor(acf_periodb.acf,y_num:acf_periodb.acf) -0.08 0.35 -0.71 0.59 1.00 3816 3404
cor(acf_periodc.postMacf,y_num:acf_periodb.acf) 0.01 0.33 -0.62 0.63 1.00 3414 3206
cor(Intercept,y_num:acf_periodc.postMacf) -0.14 0.32 -0.69 0.51 1.00 3844 2636
cor(y_num,y_num:acf_periodc.postMacf) -0.05 0.33 -0.65 0.59 1.00 3222 2957
cor(acf_periodb.acf,y_num:acf_periodc.postMacf) 0.02 0.33 -0.59 0.64 1.00 2776 3075
cor(acf_periodc.postMacf,y_num:acf_periodc.postMacf) -0.08 0.35 -0.72 0.60 1.00 2433 2632
cor(y_num:acf_periodb.acf,y_num:acf_periodc.postMacf) 0.02 0.33 -0.61 0.64 1.00 2168 3141
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept -7.93 0.08 -8.08 -7.78 1.00 1561 2465
y_num -0.09 0.01 -0.11 -0.06 1.00 4286 2639
acf_periodb.acf -0.00 0.97 -1.94 1.87 1.00 2620 3110
acf_periodc.postMacf -0.33 0.40 -1.12 0.47 1.00 2470 2788
y_num:acf_periodb.acf -0.01 0.12 -0.25 0.23 1.00 2628 3187
y_num:acf_periodc.postMacf 0.02 0.04 -0.06 0.09 1.00 2251 2732
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 93.62 65.60 27.13 273.55 1.00 4326 3542
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_extrapulmonary)
pp_check(m_extrapulmonary, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise in plot
ggsave(here("figures/s9.png"), width=10)
Saving 10 x 7 in image
Summarise numerically.
overall_change_extrapulmonary <- summarise_change(model_data=overall_scaffold, model=m_extrapulmonary,
population_denominator=population_without_inst_ship, grouping_var=NULL, re_formula = NA)
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
overall_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
Numbers of extra-pulmonary TB cases averted overall.
overall_ep_counterf <- calculate_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary,
population_denominator = population_without_inst_ship)
Joining with `by = join_by(year, population_without_inst_ship, .draw)`Joining with `by = join_by(.draw)`
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
Total extrapulmonary TB cases averted between 1958 and 1963
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
datatable()
NA
NA
Make into Table 2
bind_rows(
overall_pulmonary_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "PTB_ward"),
overall_pulmonary_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "PTB_overall"),
overall_ep_counterf$counter_post %>%
mutate(across(c(cases_averted:cases_averted.upper, diff_inc100k:diff_inc100k.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(rr_inc100k:rr_inc100k.upper), number_format(accuracy = 0.01))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "EPTB"),
overall_ep_counterf$counter_post_overall %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
mutate(model = "EPTB overall")
) %>%
select(model, year, diff_inc100k, diff_inc100k.lower:rr_inc100k.upper,
cases_averted:cases_averted.upper,
pct_change:pct_change.upper) %>%
transmute(model=model, year=year,
diff_cnr = glue("{diff_inc100k} ({diff_inc100k.lower} to {diff_inc100k.upper})"),
rr = glue("{rr_inc100k} ({rr_inc100k.lower} to {rr_inc100k.upper})"),
cases_averted = glue("{cases_averted} ({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue("{pct_change} ({pct_change.lower} to {pct_change.upper})")) %>%
write_csv(here("figures/table2.csv"))
Ward-level extra-pulmonary estimates in graphical form.
plot_counterfactual(model_data = mdata_extrapulmonary, model=m_extrapulmonary, outcome = inc_100k,
population_denominator = population_without_inst_ship, grouping_var = ward,re_formula =~(y_num*acf_period | ward),
ward) + scale_y_continuous(limits= c(0,75))
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Numerical summary.
ward_change_extrapulmonary <- summarise_change(model_data = mdata_extrapulmonary, model = m_extrapulmonary,
population_denominator = population_without_inst_ship, grouping_var=ward,
re_formula = ~(y_num*acf_period | ward))
`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'y_num'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw'. You can override using the `.groups` argument.
#want to keep the summary estimates here
tokeep <- c("peak_summary", "level_summary", "slope_summary")
#summary measures in a table
ward_change_extrapulmonary %>%
keep(names(.) %in% tokeep) %>%
bind_rows() %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
Fit the model
(Not rewritten the functions for this yet)
mdata_age_sex <- cases_by_age_sex %>%
filter(tb_type=="Pulmonary") %>%
mutate(acf_period = case_when(year %in% c(1950:1956) ~ "a. pre-acf",
year %in% c(1957) ~ "b. acf",
year %in% c(1958:1963) ~ "c. post-acf")) %>%
mutate(year2 = year+0.5) %>%
group_by(age, sex) %>%
mutate(y_num = row_number()) %>%
ungroup()
m_age_sex <- brm(
cases ~ y_num + (acf_period)*(age*sex) + (acf_period:y_num)*(age*sex),
data = mdata_age_sex,
family = negbinomial(),
seed = 1234,
chains = 4, cores = 4,
prior = prior(normal(0,1), class = Intercept) +
prior(gamma(0.01, 0.01), class = shape) +
prior(normal(0, 1), class = b))
Compiling Stan program...
Start sampling
starting worker pid=98428 on localhost:11567 at 11:05:18.993
starting worker pid=98442 on localhost:11567 at 11:05:19.345
starting worker pid=98462 on localhost:11567 at 11:05:20.507
starting worker pid=98476 on localhost:11567 at 11:05:20.732
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1:
Chain 1: Gradient evaluation took 6.2e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.62 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1:
Chain 1:
Chain 1: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2:
Chain 2: Gradient evaluation took 8.3e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.83 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2:
Chain 2:
Chain 2: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3:
Chain 3: Gradient evaluation took 5.6e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.56 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3:
Chain 3:
Chain 3: Iteration: 1 / 2000 [ 0%] (Warmup)
SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4:
Chain 4: Gradient evaluation took 5.8e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.58 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4:
Chain 4:
Chain 4: Iteration: 1 / 2000 [ 0%] (Warmup)
Chain 1: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 3: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 2: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 4: Iteration: 200 / 2000 [ 10%] (Warmup)
Chain 1: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 4: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 2: Iteration: 400 / 2000 [ 20%] (Warmup)
Chain 3: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 1: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 4: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 2: Iteration: 600 / 2000 [ 30%] (Warmup)
Chain 3: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 1: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 4: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 2: Iteration: 800 / 2000 [ 40%] (Warmup)
Chain 3: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 3: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 4: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 4: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 1: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 1: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 2: Iteration: 1000 / 2000 [ 50%] (Warmup)
Chain 2: Iteration: 1001 / 2000 [ 50%] (Sampling)
Chain 3: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 1: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 2: Iteration: 1200 / 2000 [ 60%] (Sampling)
Chain 4: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 3: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 1: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 2: Iteration: 1400 / 2000 [ 70%] (Sampling)
Chain 4: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 3: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 2: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 1: Iteration: 1600 / 2000 [ 80%] (Sampling)
Chain 4: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 3: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 2: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 1: Iteration: 1800 / 2000 [ 90%] (Sampling)
Chain 4: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 4:
Chain 4: Elapsed Time: 16.959 seconds (Warm-up)
Chain 4: 18.942 seconds (Sampling)
Chain 4: 35.901 seconds (Total)
Chain 4:
Chain 3: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 3:
Chain 3: Elapsed Time: 16.574 seconds (Warm-up)
Chain 3: 19.863 seconds (Sampling)
Chain 3: 36.437 seconds (Total)
Chain 3:
Chain 2: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 2:
Chain 2: Elapsed Time: 18.234 seconds (Warm-up)
Chain 2: 19.542 seconds (Sampling)
Chain 2: 37.776 seconds (Total)
Chain 2:
Chain 1: Iteration: 2000 / 2000 [100%] (Sampling)
Chain 1:
Chain 1: Elapsed Time: 18.018 seconds (Warm-up)
Chain 1: 20.185 seconds (Sampling)
Chain 1: 38.203 seconds (Total)
Chain 1:
summary(m_age_sex)
Family: negbinomial
Links: mu = log; shape = identity
Formula: cases ~ y_num + (acf_period) * (age * sex) + (acf_period:y_num) * (age * sex)
Data: mdata_age_sex (Number of observations: 224)
Draws: 4 chains, each with iter = 2000; warmup = 1000; thin = 1;
total post-warmup draws = 4000
Regression Coefficients:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept 4.42 0.11 4.20 4.64 1.00 1397 2401
y_num -0.17 0.03 -0.23 -0.11 1.00 1352 2186
acf_periodb.acf -0.02 0.99 -1.98 1.94 1.00 6206 2892
acf_periodc.postMacf -0.50 0.33 -1.15 0.15 1.00 2432 2868
age06_15 0.63 0.15 0.34 0.93 1.00 1844 2734
age16_25 1.85 0.13 1.60 2.12 1.00 1587 2376
age26_35 1.13 0.14 0.86 1.40 1.00 1700 2530
age36_45 0.28 0.15 -0.01 0.57 1.00 1880 2731
age46_55 -0.62 0.17 -0.97 -0.27 1.00 2170 2602
age56_65 -1.08 0.19 -1.46 -0.71 1.00 2551 2927
age65P -1.64 0.22 -2.07 -1.21 1.00 2964 3032
sexM 0.16 0.15 -0.13 0.46 1.00 1413 2537
age06_15:sexM -0.43 0.21 -0.83 -0.03 1.00 2045 2796
age16_25:sexM -0.58 0.18 -0.94 -0.23 1.00 1713 2556
age26_35:sexM -0.33 0.19 -0.70 0.04 1.00 1668 2681
age36_45:sexM 0.24 0.20 -0.16 0.62 1.00 1941 2649
age46_55:sexM 1.15 0.22 0.72 1.57 1.00 1947 2397
age56_65:sexM 1.13 0.24 0.67 1.59 1.00 2309 2597
age65P:sexM 1.00 0.26 0.47 1.50 1.00 2723 3163
acf_periodb.acf:age06_15 0.01 1.00 -1.95 1.95 1.00 7072 2973
acf_periodc.postMacf:age06_15 -0.59 0.51 -1.57 0.45 1.00 3737 3352
acf_periodb.acf:age16_25 0.04 0.98 -1.88 1.90 1.00 6437 2778
acf_periodc.postMacf:age16_25 0.74 0.42 -0.08 1.58 1.00 3367 3236
acf_periodb.acf:age26_35 0.05 1.01 -1.95 2.01 1.00 6205 2400
acf_periodc.postMacf:age26_35 0.66 0.43 -0.19 1.50 1.00 3365 3163
acf_periodb.acf:age36_45 0.04 1.03 -2.00 2.07 1.00 6355 2766
acf_periodc.postMacf:age36_45 0.75 0.46 -0.15 1.66 1.00 3567 3174
acf_periodb.acf:age46_55 0.06 0.98 -1.86 1.95 1.00 7318 2828
acf_periodc.postMacf:age46_55 0.86 0.48 -0.06 1.78 1.00 3668 2624
acf_periodb.acf:age56_65 0.03 0.99 -1.84 1.96 1.00 7096 2983
acf_periodc.postMacf:age56_65 0.63 0.52 -0.36 1.64 1.00 3886 2904
acf_periodb.acf:age65P 0.06 0.97 -1.81 1.96 1.00 7553 3091
acf_periodc.postMacf:age65P 0.97 0.54 -0.11 1.99 1.00 3746 2598
acf_periodb.acf:sexM -0.00 1.01 -2.00 1.96 1.00 6464 3241
acf_periodc.postMacf:sexM -0.06 0.37 -0.78 0.66 1.00 2954 3273
y_num:acf_periodb.acf -0.10 0.13 -0.36 0.16 1.00 5945 2661
y_num:acf_periodc.postMacf 0.04 0.04 -0.03 0.12 1.00 2137 2900
acf_periodb.acf:age06_15:sexM 0.01 0.98 -1.95 1.93 1.00 7778 3164
acf_periodc.postMacf:age06_15:sexM -0.58 0.63 -1.80 0.65 1.00 4705 2901
acf_periodb.acf:age16_25:sexM 0.01 0.96 -1.89 1.90 1.00 6657 2848
acf_periodc.postMacf:age16_25:sexM 0.65 0.53 -0.36 1.69 1.00 4256 3449
acf_periodb.acf:age26_35:sexM -0.01 1.01 -1.95 1.95 1.00 6234 2832
acf_periodc.postMacf:age26_35:sexM 0.40 0.52 -0.61 1.40 1.00 3754 3322
acf_periodb.acf:age36_45:sexM 0.00 0.98 -1.89 1.88 1.00 7630 2767
acf_periodc.postMacf:age36_45:sexM 0.10 0.55 -0.97 1.20 1.00 4806 3262
acf_periodb.acf:age46_55:sexM 0.00 0.99 -1.99 1.94 1.00 6639 3139
acf_periodc.postMacf:age46_55:sexM 0.66 0.54 -0.40 1.70 1.00 4437 2911
acf_periodb.acf:age56_65:sexM 0.02 0.97 -1.87 1.92 1.00 7289 2921
acf_periodc.postMacf:age56_65:sexM 0.34 0.57 -0.74 1.46 1.00 4341 3319
acf_periodb.acf:age65P:sexM 0.04 0.98 -1.86 1.95 1.00 6525 3240
acf_periodc.postMacf:age65P:sexM 0.27 0.60 -0.91 1.46 1.00 4599 3141
y_num:acf_perioda.preMacf:age06_15 0.02 0.04 -0.05 0.10 1.00 1656 2702
y_num:acf_periodb.acf:age06_15 0.15 0.13 -0.11 0.42 1.00 6384 2890
y_num:acf_periodc.postMacf:age06_15 0.08 0.05 -0.02 0.17 1.00 3217 3004
y_num:acf_perioda.preMacf:age16_25 0.12 0.03 0.06 0.19 1.00 1498 2372
y_num:acf_periodb.acf:age16_25 0.25 0.13 -0.01 0.50 1.00 6014 2846
y_num:acf_periodc.postMacf:age16_25 -0.04 0.04 -0.12 0.04 1.00 2736 3384
y_num:acf_perioda.preMacf:age26_35 0.15 0.03 0.08 0.22 1.00 1593 2508
y_num:acf_periodb.acf:age26_35 0.31 0.14 0.05 0.59 1.00 5080 2449
y_num:acf_periodc.postMacf:age26_35 0.02 0.04 -0.06 0.10 1.00 2836 2985
y_num:acf_perioda.preMacf:age36_45 0.17 0.04 0.10 0.24 1.00 1676 2526
y_num:acf_periodb.acf:age36_45 0.40 0.14 0.14 0.67 1.00 6032 3088
y_num:acf_periodc.postMacf:age36_45 0.06 0.04 -0.02 0.14 1.00 2986 3068
y_num:acf_perioda.preMacf:age46_55 0.19 0.04 0.11 0.28 1.00 1857 2753
y_num:acf_periodb.acf:age46_55 0.44 0.13 0.18 0.70 1.00 6515 2608
y_num:acf_periodc.postMacf:age46_55 0.09 0.04 0.01 0.18 1.00 3184 3159
y_num:acf_perioda.preMacf:age56_65 0.17 0.05 0.08 0.27 1.00 2082 2571
y_num:acf_periodb.acf:age56_65 0.39 0.13 0.14 0.64 1.00 6358 3037
y_num:acf_periodc.postMacf:age56_65 0.11 0.05 0.02 0.20 1.00 3033 3210
y_num:acf_perioda.preMacf:age65P 0.23 0.05 0.13 0.33 1.00 2645 2713
y_num:acf_periodb.acf:age65P 0.43 0.13 0.18 0.68 1.00 7083 3005
y_num:acf_periodc.postMacf:age65P 0.11 0.05 0.02 0.20 1.00 3171 2781
y_num:acf_perioda.preMacf:sexM 0.02 0.04 -0.06 0.09 1.00 1333 2367
y_num:acf_periodb.acf:sexM -0.04 0.14 -0.31 0.24 1.00 6215 3021
y_num:acf_periodc.postMacf:sexM -0.01 0.04 -0.09 0.06 1.00 2254 2895
y_num:acf_perioda.preMacf:age06_15:sexM 0.00 0.05 -0.10 0.10 1.00 1870 2862
y_num:acf_periodb.acf:age06_15:sexM 0.04 0.14 -0.23 0.33 1.00 6489 2838
y_num:acf_periodc.postMacf:age06_15:sexM 0.10 0.06 -0.01 0.21 1.00 3508 2947
y_num:acf_perioda.preMacf:age16_25:sexM -0.00 0.05 -0.09 0.09 1.00 1633 2322
y_num:acf_periodb.acf:age16_25:sexM 0.06 0.13 -0.21 0.32 1.00 5721 2812
y_num:acf_periodc.postMacf:age16_25:sexM -0.01 0.05 -0.11 0.09 1.00 3214 3072
y_num:acf_perioda.preMacf:age26_35:sexM -0.01 0.05 -0.10 0.08 1.00 1558 2600
y_num:acf_periodb.acf:age26_35:sexM 0.05 0.14 -0.23 0.33 1.00 5835 3066
y_num:acf_periodc.postMacf:age26_35:sexM -0.00 0.05 -0.10 0.09 1.00 2801 2922
y_num:acf_perioda.preMacf:age36_45:sexM -0.01 0.05 -0.11 0.08 1.00 1678 2542
y_num:acf_periodb.acf:age36_45:sexM 0.00 0.14 -0.26 0.27 1.00 5495 2932
y_num:acf_periodc.postMacf:age36_45:sexM -0.00 0.05 -0.10 0.10 1.00 3458 3304
y_num:acf_perioda.preMacf:age46_55:sexM -0.01 0.05 -0.11 0.10 1.00 1708 2607
y_num:acf_periodb.acf:age46_55:sexM -0.00 0.14 -0.28 0.28 1.00 5719 2840
y_num:acf_periodc.postMacf:age46_55:sexM -0.06 0.05 -0.17 0.04 1.00 3314 2999
y_num:acf_perioda.preMacf:age56_65:sexM 0.05 0.06 -0.06 0.17 1.00 1993 2399
y_num:acf_periodb.acf:age56_65:sexM 0.07 0.14 -0.19 0.35 1.00 6147 2930
y_num:acf_periodc.postMacf:age56_65:sexM 0.02 0.05 -0.09 0.12 1.00 3245 3092
y_num:acf_perioda.preMacf:age65P:sexM 0.00 0.06 -0.11 0.13 1.00 2469 3062
y_num:acf_periodb.acf:age65P:sexM 0.07 0.14 -0.20 0.34 1.01 5209 3126
y_num:acf_periodc.postMacf:age65P:sexM 0.01 0.06 -0.10 0.12 1.00 3625 2882
Further Distributional Parameters:
Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
shape 202.57 68.03 108.42 370.40 1.00 2430 2563
Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
plot(m_age_sex)
pp_check(m_age_sex, type='ecdf_overlay')
Using 10 posterior draws for ppc type 'ecdf_overlay' by default.
Summarise posterior
ggsave(here("figures/s12.png"), height=10)
Saving 7 x 10 in image
Calculate summary effects
#Peak
out_age_sex_1 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num == 8),
acf_period = c("a. pre-acf", "b. acf"))
peak_draws_age_sex <- add_epred_draws(newdata = out_age_sex_1,
object = m_age_sex) %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(.epred)/first(.epred)) %>%
ungroup() %>%
mutate(measure = "RR.peak")
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
peak_summary_age_sex <- peak_draws_age_sex %>%
group_by(age, sex) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.peak")
#Level
out_age_sex_2 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num == 9),
acf_period = c("a. pre-acf", "c. post-acf"))
level_draws_age_sex <- add_epred_draws(newdata = out_age_sex_2,
object = m_age_sex) %>%
arrange(y_num, .draw) %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(.epred)/first(.epred)) %>%
ungroup() %>%
mutate(measure = "RR.level")
`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
level_summary_age_sex <- level_draws_age_sex %>%
group_by(age, sex) %>%
mean_qi(estimate) %>%
mutate(measure = "RR.level")
#Slope
out_age_sex_3 <- crossing(mdata_age_sex %>%
select(y_num, age, sex) %>%
filter(y_num %in% c(9,14)),
acf_period = c("a. pre-acf", "c. post-acf"))
slope_draws_age_sex <- add_epred_draws(newdata = out_age_sex_3,
object = m_age_sex) %>%
arrange(y_num) %>%
ungroup() %>%
group_by(.draw, y_num, age, sex) %>%
summarise(slope = last(.epred)/first(.epred)) %>%
ungroup() %>%
group_by(.draw, age, sex) %>%
summarise(estimate = last(slope)/first(slope)) %>%
mutate(measure = "RR.slope")
`summarise()` has grouped output by '.draw', 'y_num', 'age'. You can override using the `.groups` argument.`summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
slope_summary_age_sex <- slope_draws_age_sex %>%
group_by(age, sex) %>%
median_qi(estimate) %>%
mutate(measure = "RR.slope")
Numerical summary of these summary results
bind_rows(
peak_summary_age_sex, level_summary_age_sex, slope_summary_age_sex
) %>%
mutate(across(c(estimate:.upper), number, accuracy=0.01)) %>%
select(measure, everything()) %>%
datatable()
NA
NA
NA
As a figure
peak_g_age_sex <- peak_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#level plot
level_g_age_sex <- level_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
#slope plot
slope_g_age_sex <- slope_summary_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_hline(aes(yintercept=1), linetype=2)+
geom_pointrange(aes(x=age, y=estimate, ymin=.lower, ymax=.upper, group=sex, colour=sex, shape=sex),
position = position_dodge(width = 0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
labs(x="",
y="Relative rate (95% UI)") +
theme_ggdist() +
theme(legend.position = "bottom",
panel.border = element_rect(colour = "grey78", fill=NA))
counterfact_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(year, age, sex, .draw, .epred_counterf = .epred)
Adding missing grouping variables: `year2`, `y_num`, `acf_period`, `.row`
#Calcuate predicted number of cases per draw, then summarise.
post_change_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
ungroup() %>%
select(year, age, sex, .draw, .epred)
#for the overall period
counterfact_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex) %>%
mutate(acf_period = "a. pre-acf")) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(age, sex, .draw) %>%
summarise(.epred_counterf = sum(.epred)) %>%
mutate(year = "Overall (1958-1963)")
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row``summarise()` has grouped output by 'age', 'sex'. You can override using the `.groups` argument.
#Calcuate incidence per draw, then summarise.
post_change_overall_age_sex <-
add_epred_draws(object = m_age_sex,
newdata = mdata_age_sex %>%
select(year, year2, y_num, age, sex, acf_period)) %>%
filter(year>1957) %>%
select(age, sex, .draw, .epred) %>%
group_by(.draw, age, sex) %>%
summarise(.epred = sum(.epred))
Adding missing grouping variables: `year`, `year2`, `y_num`, `acf_period`, `.row``summarise()` has grouped output by '.draw', 'age'. You can override using the `.groups` argument.
counter_post_overall_age_sex <-
left_join(counterfact_overall_age_sex, post_change_overall_age_sex) %>%
mutate(cases_averted = .epred_counterf-.epred,
pct_change = (.epred - .epred_counterf)/.epred_counterf) %>%
group_by(age, sex) %>%
mean_qi(cases_averted, pct_change) %>%
ungroup() %>%
mutate(year = "Overall (1958-1963)")
Joining with `by = join_by(age, sex, .draw)`
age_sex_txt <- counter_post_overall_age_sex %>%
mutate(across(c(cases_averted:cases_averted.upper), number_format(accuracy = 0.1, big.mark = ","))) %>%
mutate(across(c(pct_change:pct_change.upper), percent, accuracy=0.1)) %>%
transmute(year = as.character(year),
sex = sex,
age = age,
cases_averted = glue::glue("{cases_averted}\n({cases_averted.lower} to {cases_averted.upper})"),
pct_change = glue::glue("{pct_change}\n({pct_change.lower} to {pct_change.upper})"))
age_sex_txt %>% datatable()
NA
NA
counterfactual_g_age_sex <- counter_post_overall_age_sex %>%
mutate(sex = case_when(sex=="M" ~ "Male",
sex=="F" ~ "Female")) %>%
mutate(age = case_when(age=="00_05" ~ "0 to 5y",
age=="06_15" ~ "06 to 15y",
age=="16_25" ~ "16 to 25y",
age=="26_35" ~ "26 to 35y",
age=="36_45" ~ "36 to 45y",
age=="46_55" ~ "46 to 55y",
age=="56_65" ~ "56 to 65y",
age=="65+" ~ "65 & up y")) %>%
ggplot() +
geom_pointrange(aes(x = age, y=cases_averted, ymin=cases_averted.lower, ymax=cases_averted.upper, colour=sex, shape=sex), position=position_dodge(width=0.5)) +
scale_colour_manual(values = c("#CD7AC5", "cadetblue3"), name="") +
scale_shape(name="") +
scale_y_continuous(labels = comma) +
labs(x="",
y="Number (95% UI)",
colour="") +
theme_ggdist() +
theme(panel.border = element_rect(colour = "grey78", fill=NA),
legend.position = "bottom")
counterfactual_g_age_sex
Join together for Figure 3.
(peak_g_age_sex + level_g_age_sex) / (slope_g_age_sex + counterfactual_g_age_sex) + plot_annotation(tag_levels = "A") + plot_layout(guides = "collect") & theme(legend.position = "bottom")
ggsave(here("figures/f3.png"), width = 12, height=8)
NA
NA
Was uptake of CXR at division level associated with greated impact?
{r} # # m_division <- brm( # cases ~ 1 + y_num*acf_period + (1 + y_num*acf_period | ward) + offset(log(population_without_inst_ship)), # data = mdata, # family = negbinomial(), # seed = 1234, # chains = 4, cores = 4, # prior = prior(normal(0,1), class = Intercept) + # prior(gamma(0.01, 0.01), class = shape) + # prior(normal(0, 1), class = b) + # prior(exponential(1), class=sd) + # prior(lkj(4), class=cor), # control = list(adapt_delta = 0.9)) # #